Preliminaries

Software versions and configurations

pip install dimcat Jinja2 colorlover GitPython plotly

import os
from git import Repo
import dimcat as dc
from ms3 import __version__ as ms3_version
dataset_path = "~/all_subcorpora"
repo = Repo(dataset_path)
notebook_repo = Repo('.', search_parent_directories=True)
notebook_repo_path = notebook_repo.git.rev_parse("--show-toplevel")
print(f"Notebook repository '{os.path.basename(notebook_repo_path)}' @ {notebook_repo.commit().hexsha[:7]}")
print(f"Data repo '{os.path.basename(dataset_path)}' @ {repo.commit().hexsha[:7]}")
print(f"dimcat version {dc.__version__}")
print(f"ms3 version {ms3_version}")
Notebook repository 'dimcat' @ 36fcf12
Data repo 'all_subcorpora' @ f549aa9
dimcat version 0.2.0.post1.dev64+gda0a036
ms3 version 1.0.2
%load_ext autoreload
%autoreload 2
from collections import defaultdict, Counter
from fractions import Fraction
from IPython.display import HTML
import ms3
import plotly.express as px
import colorlover
import pandas as pd
import numpy as np
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 500)
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
plt.set_loglevel('error')

This javascript allows to add a “Toggle Code” button to every cell as per http://www.eointravers.com/post/jupyter-toggle/

HTML('''<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }
  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')
STD_LAYOUT = {
 'paper_bgcolor': '#FFFFFF',
 'plot_bgcolor': '#FFFFFF',
 'margin': {'l': 40, 'r': 0, 'b': 0, 't': 40, 'pad': 0},
 'font': {'size': 15}
}
import colorlover
#for name, scales in colorlover.scales['6']['qual'].items():
#    print(name)
#    display(HTML(colorlover.to_html(scales)))
cadence_colors = dict(zip(('HC', 'PAC', 'PC', 'IAC', 'DC', 'EC'), colorlover.scales['6']['qual']['Set1']))

def value_count_df(S, thing=None, counts='counts'):
    thing = S.name if thing is None else thing
    df = S.value_counts().rename(counts).to_frame()
    df.index.rename(thing, inplace=True)
    return df

def color_background(x, color="#ffffb3"):
    return np.where(x.notna().to_numpy(), f"background-color: {color};", None)

Data loading

dataset = dc.Dataset()
for folder in ['bach_solo', 'beethoven_piano_sonatas', 'c_schumann_lieder', 'chopin_mazurkas', 'corelli', 'debussy_suite_bergamasque', 'dvorak_silhouettes', 'grieg_lyrical_pieces', 'handel_keyboard', 'jc_bach_sonatas', 'liszt_pelerinage', 'mahler_kindertotenlieder', 'medtner_tales', 'pleyel_quartets', 'scarlatti_sonatas', 'schubert_dances', 'schumann_kinderszenen', 'tchaikovsky_seasons', 'wf_bach_sonatas']:
    print("Loading", folder)
    path = os.path.join(dataset_path, folder)
    dataset.load(directory=path)
Loading bach_solo
Loading beethoven_piano_sonatas
Loading c_schumann_lieder
Loading chopin_mazurkas
Loading corelli
Loading debussy_suite_bergamasque
Loading dvorak_silhouettes
Loading grieg_lyrical_pieces
Loading handel_keyboard
Loading jc_bach_sonatas
Loading liszt_pelerinage
Loading mahler_kindertotenlieder
Loading medtner_tales
Loading pleyel_quartets
Loading scarlatti_sonatas
Loading schubert_dances
Loading schumann_kinderszenen
Loading tchaikovsky_seasons
Loading wf_bach_sonatas
dataset.data
[default|all]
All corpora
-----------
View: This view is called 'default'. It
	- excludes fnames that are not contained in the metadata,
	- filters out file extensions requiring conversion (such as .xml), and
	- excludes review files and folders.

                               has   active   scores measures           notes        expanded
                          metadata     view detected detected parsed detected parsed detected parsed
corpus
bach_solo                      yes  default       68       68     68       68     68       68     68
beethoven_piano_sonatas        yes  default       87       87     87       87     87       64     64
c_schumann_lieder              yes  default       12       12     12       12     12       12     12
chopin_mazurkas                yes  default       55       55     55       55     55       55     55
corelli                        yes  default      149      149    149      149    149      149    149
debussy_suite_bergamasque      yes  default        4        4      4        4      4        4      4
dvorak_silhouettes             yes  default       12       12     12       12     12       12     12
grieg_lyrical_pieces           yes  default       66       66     66       66     66       66     66
handel_keyboard                yes  default        6        6      6        6      6        6      6
jc_bach_sonatas                yes  default       29       29     29       29     29       29     29
liszt_pelerinage               yes  default       19       19     19       19     19       19     19
mahler_kindertotenlieder       yes  default        5        5      5        5      5        5      5
medtner_tales                  yes  default       19       19     19       19     19       19     19
pleyel_quartets                yes  default        6        6      6        6      6        6      6
scarlatti_sonatas              yes  default       69       69     69       69     69       69     69
schubert_dances                yes  default      444      444    444      444    444       16     16
schumann_kinderszenen          yes  default       13       13     13       13     13       13     13
tchaikovsky_seasons            yes  default       12       12     12       12     12       12     12
wf_bach_sonatas                yes  default        9        9      9        9      9        9      9

2065/7034 files are excluded from this view.

1810 files have been excluded based on their subdir.
255 files have been excluded based on their file name.


There are 1 orphans that could not be attributed to any of the respective corpus's fnames.
#dataset = Corpus(directory=dataset_path)
#dataset.data

Filtering out pieces without cadence annotations

hascadence = dc.HasCadenceAnnotationsFilter().process_data(dataset)
display(HTML(f"<h4>Before: {dataset.n_indices} pieces; "
             f"after removing those without cadence labels: {hascadence.n_indices}</h4>"))
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 96) ms3.Parse.beethoven_piano_sonatas.04-3 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 96 (timesig 3/4, act_dur 1/4) is completed by 1 incorrect duration (expected: 1/2):
	{97: Fraction(3, 4)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 112) ms3.Parse.beethoven_piano_sonatas.04-3 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 112 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
	{25: Fraction(3, 4), 113: Fraction(1, 4)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 39) ms3.Parse.beethoven_piano_sonatas.13-1 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 39 (timesig 3/4, act_dur 1/8) is completed by 1 incorrect duration (expected: 5/8):
	{40: Fraction(3, 4)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 47) ms3.Parse.beethoven_piano_sonatas.13-1 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 47 (timesig 3/4, act_dur 5/8) is completed by 1 incorrect duration (expected: 1/8):
	{14: Fraction(1, 1), 48: Fraction(1, 8)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 267) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 267 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
	{268: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 269) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 269 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
	{270: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 271) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 271 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
	{272: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 273) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 273 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
	{274: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 275) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 275 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
	{276: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 277) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 277 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
	{278: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 279) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 279 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
	{280: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 281) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 281 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
	{282: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 283) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 283 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
	{284: Fraction(1, 2)}
INCOMPLETE_MC_WRONGLY_COMPLETED_WARNING (3, 285) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 753) make_offset_col():
	The incomplete MC 285 (timesig 3/4, act_dur 1/2) is completed by 1 incorrect duration (expected: 1/4):
	{286: Fraction(1, 2)}
MCS_NOT_EXCLUDED_FROM_BARCOUNT_WARNING (1, 268, 270, 272, 274, 276, 278, 280, 282, 284, 286) ms3.Parse.beethoven_piano_sonatas.13-4 -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 204) check_measure_numbers():
	MCs 268, 270, 272, 274, 276, 278, 280, 282, 284, 286 seem to be offset from the MN's beginning but have not been excluded from barcount. Context:
	      mc   mn act_dur mc_offset  dont_count  numbering_offset
	266  267  266     1/2         0        <NA>              <NA>
	267  268  267     1/2       1/4        <NA>              <NA>
	268  269  268     1/2         0        <NA>              <NA>
	269  270  269     1/2       1/4        <NA>              <NA>
	270  271  270     1/2         0        <NA>              <NA>
	271  272  271     1/2       1/4        <NA>              <NA>
	272  273  272     1/2         0        <NA>              <NA>
	273  274  273     1/2       1/4        <NA>              <NA>
	274  275  274     1/2         0        <NA>              <NA>
	275  276  275     1/2       1/4        <NA>              <NA>
	276  277  276     1/2         0        <NA>              <NA>
	277  278  277     1/2       1/4        <NA>              <NA>
	278  279  278     1/2         0        <NA>              <NA>
	279  280  279     1/2       1/4        <NA>              <NA>
	280  281  280     1/2         0        <NA>              <NA>
	281  282  281     1/2       1/4        <NA>              <NA>
	282  283  282     1/2         0        <NA>              <NA>
	283  284  283     1/2       1/4        <NA>              <NA>
	284  285  284     1/2         0        <NA>              <NA>
	285  286  285     1/2       1/4        <NA>              <NA>
UNUSED_FINE_MARKER_WARNING (20, 19) ms3.Parse.schubert_dances.D735galopp01a -- /home/hentsche/miniconda3/envs/dimcat/lib/python3.10/site-packages/ms3/bs4_measures.py (line 266) __init__():
	Piece has a Fine but the last MC is missing a repeat sign or a D.C. (da capo) or D.S. (dal segno). Ignoring Fine.

Before: 1084 pieces; after removing those without cadence labels: 587

Show corpora containing pieces with cadence annotations

grouped_by_dataset = dc.CorpusGrouper().process_data(hascadence)
corpora = {group[0]: f"{len(ixs)} pieces" for group, ixs in  grouped_by_dataset.indices.items()}
print(f"{len(corpora)} corpora with {sum(map(len, grouped_by_dataset.indices.values()))} pieces containing cadence annotations:")
corpora
19 corpora with 587 pieces containing cadence annotations:
{'bach_solo': '32 pieces',
 'beethoven_piano_sonatas': '64 pieces',
 'c_schumann_lieder': '12 pieces',
 'chopin_mazurkas': '50 pieces',
 'corelli': '148 pieces',
 'debussy_suite_bergamasque': '4 pieces',
 'dvorak_silhouettes': '12 pieces',
 'grieg_lyrical_pieces': '65 pieces',
 'handel_keyboard': '6 pieces',
 'jc_bach_sonatas': '29 pieces',
 'liszt_pelerinage': '19 pieces',
 'mahler_kindertotenlieder': '5 pieces',
 'medtner_tales': '16 pieces',
 'pleyel_quartets': '6 pieces',
 'scarlatti_sonatas': '69 pieces',
 'schubert_dances': '16 pieces',
 'schumann_kinderszenen': '13 pieces',
 'tchaikovsky_seasons': '12 pieces',
 'wf_bach_sonatas': '9 pieces'}

All annotation labels from the selected pieces

all_labels = hascadence.get_facet('expanded')

print(f"{len(all_labels.index)} hand-annotated harmony labels:")
all_labels.iloc[:10, 14:].style.apply(color_background, subset="chord")
95751 hand-annotated harmony labels:
      chord numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta special pedalend placement
corpus fname interval                                      
bach_solo BWV1009_01_Prelude [0.0, 6.0) I I nan nan nan nan nan { M False False (0, 4, 1) () 0 0 nan nan nan
[6.0, 8.0) V V nan nan nan nan nan nan M False False (1, 5, 2) () 1 1 nan nan nan
[8.0, 9.0) V7 V nan 7 nan nan nan nan Mm7 False False (1, 5, 2, -1) () 1 1 nan nan nan
[9.0, 12.0) I I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan
[12.0, 14.0) V V nan nan nan nan nan nan M False False (1, 5, 2) () 1 1 nan nan nan
[14.0, 15.0) V7 V nan 7 nan nan nan nan Mm7 False False (1, 5, 2, -1) () 1 1 nan nan nan
[15.0, 20.0) I I nan nan nan nan nan } M False False (0, 4, 1) () 0 0 nan nan nan
[18.25, 18.25) nan nan nan nan nan nan nan { nan False False () () nan nan nan
[20.0, 21.0) ii/V ii nan nan nan V nan nan m False False (3, 0, 4) () 3 3 nan nan nan
[21.0, 23.0) V6/V V nan 6 nan V nan nan M False False (6, 3, 2) () 2 6 nan nan nan
HTML('''<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }
  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')

Metadata

dataset_metadata = hascadence.data.metadata()
hascadence_metadata = dataset_metadata.loc[hascadence.indices[()]]
hascadence_metadata.index.rename('dataset', level=0, inplace=True)
hascadence_metadata.head()
TimeSig KeySig last_mc last_mn length_qb last_mc_unfolded last_mn_unfolded length_qb_unfolded volta_mcs all_notes_qb n_onsets n_onset_positions guitar_chord_count form_label_count label_count harmony_version annotated_key annotators composed_start composed_end composer workTitle movementNumber movementTitle workNumber poet lyricist arranger copyright creationDate mscVersion platform source translator musescore ms3_version title_text lyricist_text has_drumset ambitus subdirectory rel_path originalFormat staff_1_ambitus staff_1_instrument reviewers subtitle_text composer_text composed_source imslp musicbrainz viaf wikidata staff_2_ambitus staff_2_instrument score_integrity staff_3_ambitus staff_3_instrument imslp.1 key mode typesetter comments electronic editor electronic encoder staff_4_ambitus staff_4_instrument text pdf score integrity extension PDF Deutsch dance goldenberg_id
dataset fname
bach_solo BWV1009_01_Prelude 1: 3/4 1: 0 88 88 264.0 88.0 88.0 264.0 NaN 292.00 1007 970 0 0 113 2.3.0 C Adrian Nagel 1717 1723 Bach, J.S. Cello Suite No.3 in C major 1 NaN NaN NaN NaN NaN NaN 2021-02-21 NaN Apple Macintosh NaN NaN 3.6.2 1.2.2 NaN NaN False 36-67 (C2-G4) MS3 MS3/BWV1009_01_Prelude.mscx NaN 36-67 (C2-G4) Piano NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
BWV1009_02_Allemande 1: 4/4 1: 0 25 24 96.0 50.0 48.0 192.0 NaN 107.25 486 469 0 0 106 2.3.0 C Adrian Nagel 1717 1723 Bach, J.S. Cello Suite No.3 in C major 2 NaN NaN NaN NaN NaN NaN 2021-02-21 NaN Apple Macintosh NaN NaN 3.6.0 1.2.2 NaN NaN False 36-67 (C2-G4) MS3 MS3/BWV1009_02_Allemande.mscx NaN 36-67 (C2-G4) Piano NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
BWV1009_03_Courante 1: 6/8 1: 0 86 84 252.0 172.0 168.0 504.0 NaN 257.00 495 492 0 0 83 2.3.0 C Adrian Nagel 1717 1723 Bach, J.S. Cello Suite No.3 in C major 3 NaN NaN NaN NaN NaN NaN 2021-02-21 NaN Apple Macintosh NaN NaN 3.6.0 1.2.2 NaN NaN False 36-64 (C2-E4) MS3 MS3/BWV1009_03_Courante.mscx NaN 36-64 (C2-E4) Piano NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
BWV1009_04_Sarabande 1: 3/4 1: 0 24 24 72.0 48.0 48.0 144.0 NaN 122.00 217 171 0 0 55 2.3.0 C Adrian Nagel 1717 1723 Bach, J.S. Cello Suite No.3 in C major 4 NaN NaN NaN NaN NaN NaN 2021-03-08 NaN Apple Macintosh NaN NaN 3.6.0 1.2.2 NaN NaN False 36-67 (C2-G4) MS3 MS3/BWV1009_04_Sarabande.mscx NaN 36-67 (C2-G4) Piano NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
BWV1009_05_BourréeI 1: 2/2 1: 0 29 28 112.0 58.0 56.0 224.0 NaN 118.50 191 186 0 0 64 2.3.0 C Adrian Nagel 1717 1723 Bach, J.S. Cello Suite No.3 in C major 5 NaN NaN NaN NaN NaN NaN 2021-02-21 NaN Apple Macintosh NaN NaN 3.6.2 1.2.2 NaN NaN False 36-65 (C2-F4) MS3 MS3/BWV1009_05_BourréeI.mscx NaN 36-65 (C2-F4) Piano NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
hascadence_metadata.groupby(level=0).composed_end.value_counts()
dataset                    composed_end
bach_solo                  1723            32
beethoven_piano_sonatas    1795            12
                           1798            10
                           1802            10
                           1797             8
                           1822             5
                           1799             3
                           1804             3
                           1805             3
                           1810             3
                           1820             3
                           1796             2
                           1809             2
c_schumann_lieder          1844             6
                           1853             6
chopin_mazurkas            1832             9
                           1833             8
                           1838             5
                           1846             4
                           1837             3
                           1842             3
                           1844             3
                           1845             3
                           1826             2
                           1830             2
                           1835             2
                           1827             1
                           1834             1
                           1839             1
                           1840             1
                           1841             1
                           1849             1
corelli                    1689            50
                           1694            50
                           1681            48
debussy_suite_bergamasque  1905             4
dvorak_silhouettes         1879            12
grieg_lyrical_pieces       1867             8
                           1883             7
                           1901             7
                           1886             6
                           1888             6
                           1891             6
                           1893             6
                           1895             6
                           1896             6
                           1899             6
                           1878             1
handel_keyboard            1720             6
jc_bach_sonatas            1765            16
                           1780            13
liszt_pelerinage           1855             9
                           1858             7
                           1861             3
mahler_kindertotenlieder   1904             5
medtner_tales              1917             8
                           1912             4
                           1924             2
                           1905             1
                           1907             1
pleyel_quartets            1783             6
scarlatti_sonatas          1739            32
                           1742            30
                           1746             4
                           1749             3
schubert_dances            1820            14
                           1823             1
                           1825             1
schumann_kinderszenen      1839            13
tchaikovsky_seasons        1876            12
wf_bach_sonatas            1760             9
Name: composed_end, dtype: int64
mean_composition_years = hascadence_metadata.groupby(level=0).composed_end.mean().astype(int).sort_values()
chronological_order = mean_composition_years.index.to_list()
bar_data = pd.concat([mean_composition_years.rename('year'),
                      hascadence_metadata.groupby(level='dataset').size().rename('pieces')],
                     axis=1
                    ).reset_index()
fig = px.bar(bar_data, x='year', y='pieces', color='dataset', title='Pieces contained in the dataset')
fig.update_traces(width=5)

Keys

Computing extent of key segments from annotations

In the following, major and minor keys are distinguished as boolean localkey_is_minor=(False|True)

segmented_by_keys = dc.Pipeline([
                         dc.LocalKeySlicer(),
                         dc.ModeGrouper()])\
                        .process_data(hascadence)
key_segments = segmented_by_keys.get_slice_info()
print(key_segments.duration_qb.dtype)
key_segments.duration_qb = pd.to_numeric(key_segments.duration_qb)
object
key_segments.iloc[:15, 11:].fillna('').style.apply(color_background, subset="localkey")
        globalkey localkey pedal chord numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note special volta pedalend placement
localkey_is_minor corpus fname localkey_slice                                            
False bach_solo BWV1009_01_Prelude [0.0, 264.0) C I I I { M False False (0, 4, 1) () 0 0
BWV1009_02_Allemande [0.0, 16.75) C I V V { M False False (1, 5, 2) () 1 1
[16.75, 52.75) C V V6 V 6 M False False (5, 2, 1) () 1 5
[68.75, 96.0) C I V7/IV V 7 IV Mm7 False False (0, 4, 1, -2) () 0 0
BWV1009_03_Courante [0.0, 24.5) C I I I { M False False (0, 4, 1) () 0 0
[24.5, 123.5) C V I6 I 6 M False False (4, 1, 0) () 0 4
[168.5, 252.0) C I V7/IV V 7 IV Mm7 False False (0, 4, 1, -2) () 0 0
BWV1009_04_Sarabande [0.0, 13.0) C I I I { M False False (0, 4, 1) () 0 0
[13.0, 27.0) C V ii7 ii 7 mm7 False False (2, -1, 3, 0) () 2 2
[48.0, 60.0) C V viio64 vii o 64 { o False False (-1, 5, 2) () 5 -1
[60.0, 72.0) C I viio64/IV vii o 64 IV o False False (-2, 4, 1) () 4 -2
BWV1009_05_BourréeI [0.0, 17.0) C I I6 I 6 { M False False (4, 1, 0) () 0 4
[17.0, 40.0) C V ii ii m False False (2, -1, 3) () 2 2
[66.0, 112.0) C I V/V V V M False False (2, 6, 3) () 2 2
BWV1009_06_BourréeII [17.0, 41.0) c III I I { M True False (0, 4, 1) () 0 0

Ratio between major and minor key segments by aggregated durations

Overall

maj_min_ratio = key_segments.groupby(level="localkey_is_minor").duration_qb.sum().to_frame()
maj_min_ratio['fraction'] = (100.0 * maj_min_ratio.duration_qb / maj_min_ratio.duration_qb.sum()).round(1)
maj_min_ratio
duration_qb fraction
localkey_is_minor
False 87978.0 61.3
True 55521.0 38.7

By dataset

segment_duration_per_dataset = key_segments.groupby(level=["corpus", "localkey_is_minor"]).duration_qb.sum().round(2)
norm_segment_duration_per_dataset = 100 * segment_duration_per_dataset / segment_duration_per_dataset.groupby(level="corpus").sum()
maj_min_ratio_per_dataset = pd.concat([segment_duration_per_dataset,
                                      norm_segment_duration_per_dataset.rename('fraction').round(1).astype(str)+" %"],
                                     axis=1)
segment_duration_per_dataset = key_segments.groupby(level=["corpus", "localkey_is_minor"]).duration_qb.sum().reset_index()
maj_min_ratio_per_dataset.reset_index()
corpus localkey_is_minor duration_qb fraction
0 bach_solo False 3340.00 59.5 %
1 bach_solo True 2277.25 40.5 %
2 beethoven_piano_sonatas False 23659.75 66.3 %
3 beethoven_piano_sonatas True 12003.38 33.7 %
4 c_schumann_lieder False 1296.00 88.9 %
5 c_schumann_lieder True 162.50 11.1 %
6 chopin_mazurkas False 7845.50 57.2 %
7 chopin_mazurkas True 5881.75 42.8 %
8 corelli False 9607.00 53.4 %
9 corelli True 8387.00 46.6 %
10 debussy_suite_bergamasque False 584.00 36.1 %
11 debussy_suite_bergamasque True 1032.00 63.9 %
12 dvorak_silhouettes False 1239.50 66.9 %
13 dvorak_silhouettes True 613.00 33.1 %
14 grieg_lyrical_pieces False 9900.17 60.4 %
15 grieg_lyrical_pieces True 6504.33 39.6 %
16 handel_keyboard False 218.00 100.0 %
17 jc_bach_sonatas False 6653.00 80.0 %
18 jc_bach_sonatas True 1659.50 20.0 %
19 liszt_pelerinage False 6833.42 70.4 %
20 liszt_pelerinage True 2868.46 29.6 %
21 mahler_kindertotenlieder False 665.00 36.3 %
22 mahler_kindertotenlieder True 1165.00 63.7 %
23 medtner_tales False 1598.67 34.7 %
24 medtner_tales True 3011.33 65.3 %
25 pleyel_quartets False 1713.00 56.5 %
26 pleyel_quartets True 1321.50 43.5 %
27 scarlatti_sonatas False 7280.38 53.4 %
28 scarlatti_sonatas True 6352.50 46.6 %
29 schubert_dances False 1038.00 96.2 %
30 schubert_dances True 41.00 3.8 %
31 schumann_kinderszenen False 700.00 74.9 %
32 schumann_kinderszenen True 234.00 25.1 %
33 tchaikovsky_seasons False 2387.00 60.9 %
34 tchaikovsky_seasons True 1532.00 39.1 %
35 wf_bach_sonatas False 1419.62 74.9 %
36 wf_bach_sonatas True 474.50 25.1 %
chronological_order
['corelli',
 'handel_keyboard',
 'bach_solo',
 'scarlatti_sonatas',
 'wf_bach_sonatas',
 'jc_bach_sonatas',
 'pleyel_quartets',
 'beethoven_piano_sonatas',
 'schubert_dances',
 'chopin_mazurkas',
 'schumann_kinderszenen',
 'c_schumann_lieder',
 'liszt_pelerinage',
 'tchaikovsky_seasons',
 'dvorak_silhouettes',
 'grieg_lyrical_pieces',
 'mahler_kindertotenlieder',
 'debussy_suite_bergamasque',
 'medtner_tales']
fig = px.bar(maj_min_ratio_per_dataset.reset_index(),
       x="corpus",
       y="duration_qb",
       color="localkey_is_minor",
       text='fraction',
       labels=dict(dataset='', duration_qb="aggregated duration in quarter notes"),
       category_orders=dict(dataset=chronological_order)
    )
fig.update_layout(**STD_LAYOUT)

Annotation table sliced by key segments

annotations_by_keys = segmented_by_keys.get_facet("expanded")
annotations_by_keys
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note special volta pedalend placement
localkey_is_minor corpus fname localkey_slice interval
False bach_solo BWV1009_01_Prelude [0.0, 264.0) [0.0, 6.0) 1 1 0 6.0 0 0 3/4 1 1 C.I{ NaN C I NaN I I NaN NaN NaN NaN NaN { M False False (0, 4, 1) () 0 0 NaN <NA> NaN NaN
[6.0, 8.0) 3 3 6 2.0 0 0 3/4 1 1 V NaN C I NaN V V NaN NaN NaN NaN NaN NaN M False False (1, 5, 2) () 1 1 NaN <NA> NaN NaN
[8.0, 9.0) 3 3 8 1.0 1/2 1/2 3/4 1 1 V7 NaN C I NaN V7 V NaN 7 NaN NaN NaN NaN Mm7 False False (1, 5, 2, -1) () 1 1 NaN <NA> NaN NaN
[9.0, 12.0) 4 4 9 3.0 0 0 3/4 1 1 I NaN C I NaN I I NaN NaN NaN NaN NaN NaN M False False (0, 4, 1) () 0 0 NaN <NA> NaN NaN
[12.0, 14.0) 5 5 12 2.0 0 0 3/4 1 1 V NaN C I NaN V V NaN NaN NaN NaN NaN NaN M False False (1, 5, 2) () 1 1 NaN <NA> NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
True wf_bach_sonatas F003_n04c [266.0, 335.0) [327.0, 327.0) 84 82 327 0.0 1/2 1/2 2/2 2 1 { NaN D vi NaN NaN NaN NaN NaN NaN NaN NaN { NaN False True () () <NA> <NA> NaN <NA> NaN NaN
[330.0, 331.0) 85 83 330 1.0 1/4 1/4 2/2 2 1 i6 NaN D vi NaN i6 i NaN 6 NaN NaN NaN NaN m False True (-3, 1, 0) () 0 -3 NaN <NA> NaN NaN
[331.0, 332.0) 85 83 331 1.0 1/2 1/2 2/2 2 1 iv NaN D vi NaN iv iv NaN NaN NaN NaN NaN NaN m False True (-1, -4, 0) () -1 -1 NaN <NA> NaN NaN
[332.0, 333.0) 85 83 332 1.0 3/4 3/4 2/2 2 1 V NaN D vi NaN V V NaN NaN NaN NaN NaN NaN M False True (1, 5, 2) () 1 1 NaN <NA> NaN NaN
[333.0, 335.0) 86 84 333 2.0 0 0 2/2 2 1 i|IAC}{ NaN D vi NaN i i NaN NaN NaN NaN IAC }{ m False True (0, -3, 1) () 0 0 NaN <NA> NaN NaN

95750 rows × 33 columns

HTML('''<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }
  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')

Phrases

Overview

Presence of phrase annotation symbols per dataset:

all_labels.groupby(["corpus"]).phraseend.value_counts()
corpus                     phraseend
bach_solo                  }            172
                           {            170
                           }{            24
beethoven_piano_sonatas    }            925
                           {            918
                           }{           424
c_schumann_lieder          {             55
                           }             55
                           }{            34
chopin_mazurkas            }            505
                           {            498
                           }{            49
corelli                    }            705
                           {            702
                           }{           379
debussy_suite_bergamasque  {             15
                           }             15
                           }{            10
dvorak_silhouettes         {             93
                           }             92
                           }{            77
grieg_lyrical_pieces       }            518
                           {            514
                           }{            33
handel_keyboard            {             25
                           }             25
                           }{             1
jc_bach_sonatas            }            297
                           {            293
                           }{           144
                           \\             5
liszt_pelerinage           }            208
                           {            205
                           }{            68
mahler_kindertotenlieder   }             19
                           {             18
                           }{            11
medtner_tales              {            150
                           }            150
                           }{            56
pleyel_quartets            }             86
                           {             85
                           }{            40
scarlatti_sonatas          }            566
                           {            563
                           }{           426
schubert_dances            {             71
                           }             71
schumann_kinderszenen      }             83
                           {             79
                           }{             2
tchaikovsky_seasons        {            288
                           }            288
                           }{            10
wf_bach_sonatas            }             88
                           {             87
                           }{            71
                           \\             3
Name: phraseend, dtype: int64

Presence of legacy phrase endings

all_labels[all_labels.phraseend == r'\\'].style.apply(color_background, subset="label")
      mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta special pedalend placement
corpus fname interval                                                                  
jc_bach_sonatas wa01op05no1a_Allegretto [124.0, 124.0) 64 62 124 0.000000 0 0 2/4 2 1 \\ nan Bb I nan nan nan nan nan nan nan nan \\ nan False False () () nan nan nan
wa04op05no4a_Allegro [168.0, 172.0) 43 43 168 4.000000 0 0 4/4 2 1 I]\\ nan Eb V I I I nan nan nan nan nan \\ M False False (0, 4, 1) () 0 0 nan nan nan
[464.0, 468.0) 117 117 464 4.000000 0 0 4/4 2 1 I]\\ nan Eb I I I I nan nan nan nan nan \\ M False False (0, 4, 1) () 0 0 nan nan nan
wa05op05no5b_Adagio [10.0, 11.0) 4 4 10 1.000000 1/4 1/4 3/4 2 1 I\\ nan A I nan I I nan nan nan nan nan \\ M False False (0, 4, 1) () 0 0 nan nan nan
[118.0, 119.0) 40 40 118 1.000000 1/4 1/4 3/4 2 1 I\\ nan A I nan I I nan nan nan nan nan \\ M False False (0, 4, 1) () 0 0 nan nan nan
wf_bach_sonatas F003_n04a [27.5, 28.5) 15 14 55/2 1.000000 1/4 1/4 2/4 2 1 V6\\ nan D V nan V6 V nan 6 nan nan nan \\ M False False (5, 2, 1) () 1 5 nan nan nan
[113.5, 114.0) 59 57 227/2 0.500000 1/4 1/4 2/4 2 1 vi\\ nan D V nan vi vi nan nan nan nan nan \\ m False False (3, 0, 4) () 3 3 nan nan nan
[151.0, 151.5) 78 76 151 0.500000 1/8 1/8 2/4 2 1 vi\\ nan D I nan vi vi nan nan nan nan nan \\ m False False (3, 0, 4) () 3 3 nan nan nan

A table with the extents of all annotated phrases

Relevant columns:

  • quarterbeats: start position for each phrase

  • duration_qb: duration of each phrase, measured in quarter notes

  • phrase_slice: time interval of each annotated phrases (for segmenting chord progressions and notes)

# segmented = PhraseSlicer().process_data(hascadence)
segmented = dc.PhraseSlicer().process_data(grouped_by_dataset)
phrases = segmented.get_slice_info()
print(f"Overall number of phrases is {len(phrases.index)}")
phrases.head(10).style.apply(color_background, subset=["quarterbeats", "duration_qb"])
Overall number of phrases is 6688
      mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta special pedalend placement
corpus fname phrase_slice                                                                  
bach_solo BWV1009_01_Prelude [0.0, 18.25) 1 1 0 18.250000 0 0 3/4 1 1 C.I{ nan C I nan I I nan nan nan nan nan { M False False (0, 4, 1) () 0 0 nan nan nan nan
[18.25, 36.25) 7 7 73/4 18.000000 1/16 1/16 3/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
[36.25, 78.0) 13 13 145/4 41.750000 1/16 1/16 3/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
[78.0, 108.0) 27 27 78 30.000000 0 0 3/4 1 1 vi}{ nan C I nan vi vi nan nan nan nan nan }{ m False False (3, 0, 4) () 3 3 nan nan nan nan
[108.0, 180.25) 37 37 108 72.250000 0 0 3/4 1 1 I}{ nan C I nan I I nan nan nan nan nan }{ M False False (0, 4, 1) () 0 0 nan nan nan nan
[180.25, 210.25) 61 61 721/4 30.000000 1/16 1/16 3/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
[210.25, 244.25) 71 71 841/4 34.000000 1/16 1/16 3/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
[244.25, 264.0) 82 82 977/4 19.750000 5/16 5/16 3/4 1 1 { nan C I I nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
BWV1009_02_Allemande [0.0, 15.75) 1 0 0 15.750000 0 13/16 4/4 1 1 C.V{ nan C I nan V V nan nan nan nan nan { M False False (1, 5, 2) () 1 1 nan nan nan nan
[15.75, 48.0) 5 4 63/4 32.250000 3/4 3/4 4/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
print(phrases.duration_qb.dtype)
phrases.duration_qb = pd.to_numeric(phrases.duration_qb)
object

Annotation table sliced by phrase annotations

ToDo: Example for overlap / phrase beginning without new chord

phrase_segments = segmented.get_facet("expanded")
phrase_segments.head(10)
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta special pedalend placement
corpus fname phrase_slice interval
bach_solo BWV1009_01_Prelude [0.0, 18.25) [0.0, 0.0) 1 1 0.0 0.00 0 0 3/4 1 1 C.I{ NaN C I NaN <NA> <NA> <NA> <NA> <NA> NaN NaN { <NA> False False <NA> <NA> <NA> <NA> <NA> NaN NaN NaN
[0.0, 6.0) 1 1 0.0 6.00 0 0 3/4 1 1 C.I{ NaN C I NaN I I NaN NaN NaN NaN <NA> <NA> M False False (0, 4, 1) () 0 0 <NA> NaN NaN NaN
[6.0, 8.0) 3 3 6.0 2.00 0 0 3/4 1 1 V NaN C I NaN V V NaN NaN NaN NaN NaN NaN M False False (1, 5, 2) () 1 1 <NA> NaN NaN NaN
[8.0, 9.0) 3 3 8.0 1.00 1/2 1/2 3/4 1 1 V7 NaN C I NaN V7 V NaN 7 NaN NaN NaN NaN Mm7 False False (1, 5, 2, -1) () 1 1 <NA> NaN NaN NaN
[9.0, 12.0) 4 4 9.0 3.00 0 0 3/4 1 1 I NaN C I NaN I I NaN NaN NaN NaN NaN NaN M False False (0, 4, 1) () 0 0 <NA> NaN NaN NaN
[12.0, 14.0) 5 5 12.0 2.00 0 0 3/4 1 1 V NaN C I NaN V V NaN NaN NaN NaN NaN NaN M False False (1, 5, 2) () 1 1 <NA> NaN NaN NaN
[14.0, 15.0) 5 5 14.0 1.00 1/2 1/2 3/4 1 1 V7 NaN C I NaN V7 V NaN 7 NaN NaN NaN NaN Mm7 False False (1, 5, 2, -1) () 1 1 <NA> NaN NaN NaN
[15.0, 15.0) 6 6 15.0 0.00 0 0 3/4 1 1 I} NaN C I NaN <NA> <NA> <NA> <NA> <NA> NaN NaN } <NA> False False <NA> <NA> <NA> <NA> <NA> NaN NaN NaN
[15.0, 18.25) 6 6 15.0 3.25 0 0 3/4 1 1 I} NaN C I NaN I I NaN NaN NaN NaN <NA> <NA> M False False (0, 4, 1) () 0 0 <NA> NaN NaN NaN
[18.25, 36.25) [18.25, 18.25) 7 7 18.25 0.00 1/16 1/16 3/4 1 1 { NaN C I NaN NaN NaN NaN NaN NaN NaN NaN { NaN False False () () <NA> <NA> <NA> NaN NaN NaN
print(phrase_segments.duration_qb.dtype)
phrase_segments.duration_qb = pd.to_numeric(phrase_segments.duration_qb)
float64

Distribution of phrase lengths

Histogram summarizing the lengths of all phrases measured in quarter notes

phrase_durations = phrases.duration_qb.value_counts()
histogram = px.histogram(x=phrase_durations.index, y=phrase_durations, labels=dict(x='phrase lengths binned to a quarter note', y='#phrases within length bin'))
histogram.update_traces(xbins=dict( # bins used for histogram
        #start=0.0,
        end=100.0,
        size=1
    ))
histogram.update_xaxes(dtick=4)
histogram.show()

Bar plot showing approximative phrase length in measures

Simply by subtracting for the span of every phrase the first measure measure number from the last.

phrase_gpb = phrase_segments.groupby(level=[0,1,2])
phrase_length_in_measures = phrase_gpb.mn.max() - phrase_gpb.mn.min()
measure_length_counts = phrase_length_in_measures.value_counts()
fig = px.bar(x=measure_length_counts.index, y=measure_length_counts, labels=dict(x="approximative size of all phrases (difference between end and start measure number)",
                                                                           y="#phrases"))
fig.update_xaxes(dtick=4)

Histogram summarizing phrase lengths by precise length expressed in measures

In order to divide the phrase length by the length of a measure, the phrases containing more than one time signature are filtered out.

Durations computed by dividing the duration by the measure length

phrase2timesigs = phrase_gpb.timesig.unique()
n_timesignatures_per_phrase = phrase2timesigs.map(len)
uniform_timesigs = phrase2timesigs[n_timesignatures_per_phrase == 1].map(lambda l: l[0])
more_than_one = n_timesignatures_per_phrase > 1
print(f"Filtered out the {more_than_one.sum()} phrases incorporating more than one time signature.")
n_timesigs = n_timesignatures_per_phrase.value_counts()
display(n_timesigs.reset_index().rename(columns=dict(index='#time signatures', timesig='#phrases')))
uniform_timesig_phrases = phrases.loc[uniform_timesigs.index]
timesig_in_quarterbeats = uniform_timesigs.map(Fraction) * 4
exact_measure_lengths = uniform_timesig_phrases.duration_qb / timesig_in_quarterbeats
uniform_timesigs = pd.concat([exact_measure_lengths.rename('duration_measures'), uniform_timesig_phrases], axis=1)
uniform_timesigs.to_csv('cadence_datasets_uniform_timesigs.tsv.zip', sep='\t')
fig = px.histogram(uniform_timesigs, x='duration_measures',
                   labels=dict(duration_measures='phrase length in measures, factoring in time signatures'))
fig.update_traces(xbins=dict( # bins used for histogram
        #start=0.0,
        #end=100.0,
        size=1
    ))
fig.update_xaxes(dtick=4)
Filtered out the 69 phrases incorporating more than one time signature.
#time signatures #phrases
0 1 6619
1 2 68
2 3 1
uniform_timesigs.head(10).style.apply(color_background, subset='duration_measures')
      duration_measures mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta special pedalend placement
corpus fname phrase_slice                                                                    
bach_solo BWV1009_01_Prelude [0.0, 18.25) 6.083333 1 1 0 18.250000 0 0 3/4 1 1 C.I{ nan C I nan I I nan nan nan nan nan { M False False (0, 4, 1) () 0 0 nan nan nan nan
[18.25, 36.25) 6.000000 7 7 73/4 18.000000 1/16 1/16 3/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
[36.25, 78.0) 13.916667 13 13 145/4 41.750000 1/16 1/16 3/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
[78.0, 108.0) 10.000000 27 27 78 30.000000 0 0 3/4 1 1 vi}{ nan C I nan vi vi nan nan nan nan nan }{ m False False (3, 0, 4) () 3 3 nan nan nan nan
[108.0, 180.25) 24.083333 37 37 108 72.250000 0 0 3/4 1 1 I}{ nan C I nan I I nan nan nan nan nan }{ M False False (0, 4, 1) () 0 0 nan nan nan nan
[180.25, 210.25) 10.000000 61 61 721/4 30.000000 1/16 1/16 3/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
[210.25, 244.25) 11.333333 71 71 841/4 34.000000 1/16 1/16 3/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
[244.25, 264.0) 6.583333 82 82 977/4 19.750000 5/16 5/16 3/4 1 1 { nan C I I nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
BWV1009_02_Allemande [0.0, 15.75) 3.937500 1 0 0 15.750000 0 13/16 4/4 1 1 C.V{ nan C I nan V V nan nan nan nan nan { M False False (1, 5, 2) () 1 1 nan nan nan nan
[15.75, 48.0) 8.062500 5 4 63/4 32.250000 3/4 3/4 4/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan

Inspecting long phrases

timsig_counts = uniform_timesigs.timesig.value_counts()
fig = px.bar(timsig_counts, labels=dict(index="time signature", value="#phrases"))
fig.update_layout(**STD_LAYOUT)
fig.update_yaxes(gridcolor='lightgrey')
filter_counts_smaller_than = 5
filtered_timesigs = timsig_counts[timsig_counts < filter_counts_smaller_than].index.to_list()
fig = px.histogram(uniform_timesigs[~uniform_timesigs.timesig.isin(filtered_timesigs)],
                   x='duration_measures', facet_col='timesig', facet_col_wrap=2, height=1500)
fig.update_xaxes(matches=None, showticklabels=True, visible=True, dtick=4)
fig.update_yaxes(matches=None, showticklabels=True, visible=True)
fig.update_traces(xbins=dict( # bins used for histogram
        #start=0.0,
        end=50.0,
        size=1
    ))
see_greater_equal = 33
longest_measure_length = uniform_timesigs.loc[uniform_timesigs.duration_measures >= see_greater_equal, ["duration_measures", "timesig"]]
for timesig, long_phrases in longest_measure_length.groupby('timesig'):
    L = len(long_phrases)
    plural = 's' if L > 1 else ''
    display(HTML(f"<h3>{L} long phrase{plural} in {timesig} meter:</h3>"))
    display(long_phrases.sort_values('duration_measures'))

1 long phrase in 12/8 meter:

duration_measures timesig
corpus fname phrase_slice
bach_solo BWV1012_01_Prelude [306.0, 594.0) 48.0 12/8

10 long phrases in 2/2 meter:

duration_measures timesig
corpus fname phrase_slice
grieg_lyrical_pieces op57n02 [64.0, 196.0) 33.0 2/2
[260.0, 392.0) 33.0 2/2
beethoven_piano_sonatas 26-1 [800.0, 936.0) 34.0 2/2
pleyel_quartets b309op2n3b [500.0, 640.0) 35.0 2/2
beethoven_piano_sonatas 09-3 [186.0, 332.0) 36.5 2/2
08-1 [200.0, 352.0) 38.0 2/2
21-3 [804.0, 956.0) 38.0 2/2
26-1 [244.0, 404.0) 40.0 2/2
21-3 [956.0, 1132.0) 44.0 2/2
08-1 [576.0, 776.0) 50.0 2/2

13 long phrases in 2/4 meter:

duration_measures timesig
corpus fname phrase_slice
liszt_pelerinage 160.09_Les_Cloches_de_Geneve_(Nocturne) [202.5, 270.0) 33.75 2/4
grieg_lyrical_pieces op68n03 [0.0, 68.0) 34.0 2/4
op38n01 [100.0, 172.0) 36.0 2/4
op68n04 [16.0, 92.0) 38.0 2/4
[92.0, 168.0) 38.0 2/4
beethoven_piano_sonatas 06-1 [152.5, 234.0) 40.75 2/4
grieg_lyrical_pieces op54n06 [36.0, 120.0) 42.0 2/4
liszt_pelerinage 162.03_Tarantella_da_Guillaume_Louis_Cottrau._Presto_e_canzone_napolitana [1025.5416666666667, 1109.875) 42.166667 2/4
[885.875, 970.875) 42.5 2/4
beethoven_piano_sonatas 02-1 [360.0, 448.0) 44.0 2/4
21-3 [476.0, 572.0) 48.0 2/4
liszt_pelerinage 160.09_Les_Cloches_de_Geneve_(Nocturne) [302.0, 401.25) 49.625 2/4
beethoven_piano_sonatas 23-3 [314.0, 422.0) 54.0 2/4

2 long phrases in 3/4 meter:

duration_measures timesig
corpus fname phrase_slice
chopin_mazurkas BI153-1op56-1 [306.0, 426.0) 40.0 3/4
debussy_suite_bergamasque l075-02_suite_menuet [147.0, 312.0) 55.0 3/4

2 long phrases in 3/8 meter:

duration_measures timesig
corpus fname phrase_slice
beethoven_piano_sonatas 17-3 [258.75, 321.0) 41.5 3/8
[140.75, 225.75) 56.666667 3/8

8 long phrases in 4/4 meter:

duration_measures timesig
corpus fname phrase_slice
liszt_pelerinage 160.06_Vallee_dObermann [165.0, 297.0) 33.0 4/4
mahler_kindertotenlieder kindertotenlieder_01_nun_will_die_sonn [144.0, 308.0) 41.0 4/4
beethoven_piano_sonatas 32-1 [364.125, 536.125) 43.0 4/4
21-1 [444.0, 620.0) 44.0 4/4
debussy_suite_bergamasque l075-04_suite_passepied [152.0, 332.0) 45.0 4/4
liszt_pelerinage 161.07_Apres_une_lecture_du_Dante [909.0, 1101.0) 48.0 4/4
160.06_Vallee_dObermann [515.5, 711.25) 48.9375 4/4
mahler_kindertotenlieder kindertotenlieder_05_in_diesem_wetter [236.0, 495.0) 64.75 4/4

10 long phrases in 6/8 meter:

duration_measures timesig
corpus fname phrase_slice
beethoven_piano_sonatas 18-4 [83.5, 190.0) 35.5 6/8
[272.5, 380.5) 36.0 6/8
liszt_pelerinage 162.03_Tarantella_da_Guillaume_Louis_Cottrau._Presto_e_canzone_napolitana [0.0, 111.0) 37.0 6/8
grieg_lyrical_pieces op62n05 [74.5, 186.0) 37.166667 6/8
beethoven_piano_sonatas 03-4 [774.5, 891.5) 39.0 6/8
18-4 [596.5, 715.0) 39.5 6/8
liszt_pelerinage 160.09_Les_Cloches_de_Geneve_(Nocturne) [0.0, 123.0) 41.0 6/8
beethoven_piano_sonatas 31-3 [229.5, 354.0) 41.5 6/8
18-4 [380.5, 512.5) 44.0 6/8
liszt_pelerinage 162.01_Gondoliera [243.5, 385.625) 47.375 6/8

Local keys

local_keys_per_phrase = phrase_gpb.localkey.unique().map(tuple)
n_local_keys_per_phrase = local_keys_per_phrase.map(len)
phrases_with_keys = pd.concat([n_local_keys_per_phrase.rename('n_local_keys'),
                               local_keys_per_phrase.rename('local_keys'),
                               phrases], axis=1)
phrases_with_keys.head(10).style.apply(color_background, subset=['n_local_keys', 'local_keys'])
      n_local_keys local_keys mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta special pedalend placement
corpus fname phrase_slice                                                                      
bach_solo BWV1009_01_Prelude [0.0, 18.25) 1 ('I',) 1 1 0 18.250000 0 0 3/4 1 1 C.I{ nan C I nan I I nan nan nan nan nan { M False False (0, 4, 1) () 0 0 nan nan nan nan
[18.25, 36.25) 1 ('I',) 7 7 73/4 18.000000 1/16 1/16 3/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
[36.25, 78.0) 1 ('I',) 13 13 145/4 41.750000 1/16 1/16 3/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
[78.0, 108.0) 1 ('I',) 27 27 78 30.000000 0 0 3/4 1 1 vi}{ nan C I nan vi vi nan nan nan nan nan }{ m False False (3, 0, 4) () 3 3 nan nan nan nan
[108.0, 180.25) 1 ('I',) 37 37 108 72.250000 0 0 3/4 1 1 I}{ nan C I nan I I nan nan nan nan nan }{ M False False (0, 4, 1) () 0 0 nan nan nan nan
[180.25, 210.25) 1 ('I',) 61 61 721/4 30.000000 1/16 1/16 3/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
[210.25, 244.25) 1 ('I',) 71 71 841/4 34.000000 1/16 1/16 3/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
[244.25, 264.0) 1 ('I',) 82 82 977/4 19.750000 5/16 5/16 3/4 1 1 { nan C I I nan nan nan nan nan nan nan { nan False False () () nan nan nan nan
BWV1009_02_Allemande [0.0, 15.75) 1 ('I',) 1 0 0 15.750000 0 13/16 4/4 1 1 C.V{ nan C I nan V V nan nan nan nan nan { M False False (1, 5, 2) () 1 1 nan nan nan nan
[15.75, 48.0) 2 ('I', 'V') 5 4 63/4 32.250000 3/4 3/4 4/4 1 1 { nan C I nan nan nan nan nan nan nan nan { nan False False () () nan nan nan nan

Number of unique local keys per phrase

count_n_keys = phrases_with_keys.n_local_keys.value_counts().rename("#phrases").to_frame()
count_n_keys.index.rename("unique keys", inplace=True)
count_n_keys
#phrases
unique keys
1 5422
2 1143
3 97
4 17
6 7
5 1
8 1

The most frequent keys for non-modulating phrases

unique_key_selector = phrases_with_keys.n_local_keys == 1
phrases_with_unique_key = phrases_with_keys[unique_key_selector].copy()
phrases_with_unique_key.local_keys = phrases_with_unique_key.local_keys.map(lambda t: t[0])
value_count_df(phrases_with_unique_key.local_keys, counts="#phrases")
#phrases
local_keys
I 2450
i 1579
V 451
III 281
v 172
vi 125
IV 56
iii 55
VI 53
iv 39
VII 27
bVI 19
bIII 16
#V 13
ii 13
vi/V 10
#iii 8
#II 6
bII 6
bvi 5
#VII 4
iv/iv 4
bV 4
#iv 3
ii/VI 3
bII/V 2
bI 2
bIV 2
vii 2
II 2
bii 1
#III 1
bi 1
IV/IV 1
V/VII 1
bVII 1
#I 1
#vii 1
V/V 1
iii/bVI 1

Most frequent modulations within one phrase

two_keys_selector = phrases_with_keys.n_local_keys > 1
phrases_with_unique_key = phrases_with_keys[two_keys_selector].copy()
value_count_df(phrases_with_unique_key.local_keys, "modulations")
counts
modulations
(I, V) 135
(V, I) 97
(i, III) 90
(III, i) 79
(i, v) 62
(I, vi) 59
(vi, I) 56
(v, i) 51
(iv, i) 32
(i, I) 27
(III, v) 21
(I, iii) 19
(VI, i) 16
(i, iv) 16
(V, vi) 16
(I, i) 15
(IV, I) 14
(I, IV) 13
(III, iv) 13
(v, III) 12
(bIII, I) 11
(I, ii) 11
(iii, I) 10
(III, i, v) 9
(i, VI) 8
(I, bIII) 8
(III, I) 7
(I, III) 7
(V, I, vi) 7
(v, iv) 7
(ii, I) 6
(iv, v) 6
(I, bVI) 6
(vi, ii) 5
(i, ii) 4
(III, bV, iii, i) 4
(vi, iii) 4
(i, bIII) 4
(i, V) 4
(v, V) 4
(vi, V) 4
(ii, vi) 4
(VII, i) 3
(V, i) 3
(i, iii) 3
(bVI, I) 3
(v, I) 3
(III, i, iv) 3
(I, iv) 3
(bV, bVI, bvii) 3
(ii, iii) 3
(i, v, iv) 3
(V, iii) 3
(iv, III) 3
(I, #vi) 3
(I, #II) 3
(bIII, i) 3
(bVII, i, ii, iii, IV, I) 3
(vii, i) 2
(III, iv, v) 2
(bIII/bIII/V, I) 2
(bIII/V, bIII/bIII/V) 2
(V, bIII/V) 2
(VII, I) 2
(I, VII) 2
(iii, vi) 2
(iii, bv) 2
(VI, iv) 2
(I, bVII) 2
(#II, I) 2
(VI, bII) 2
(i, bI) 2
(vi, V, iii) 2
(IV, III, I) 2
(VI, I) 2
(i, #iii) 2
(iv, IV) 2
(VII, v) 2
(bVI, iii/bVI) 2
(V, ii) 2
(vi, IV) 2
(bIII, iv) 2
(vii, I) 2
(bIII, iv, i) 2
(v, v/v) 2
(III, V) 2
(iv/iv, bII, iv, VI, i, III) 2
(iv, iv/iv) 2
(VII, bIII, II) 2
(bII, i) 2
(v, IV) 2
(i, VII) 2
(I, #V) 2
(bII, bIII/bII) 1
(bi, iv, i) 1
(bv, i, bV, V, bii, VI) 1
(iii, bVII) 1
(#iii/ii, ii, bii, iii, iv, I) 1
(#V, V) 1
(III, V/III) 1
(VI, iv, I) 1
(IV, #vi, VII, iv, vi) 1
(VII, V, vi) 1
(V, #III, VII, iii, iv, bi, bii, I) 1
(vi, ii, #iii/ii) 1
(I, III, V) 1
(VII, III) 1
(#iii, bii/#iii, i) 1
(V, VII) 1
(iii, I, i) 1
(vi, #iv) 1
(ii, vi, i) 1
(iii, I, vi) 1
(#iv, bvi) 1
(VI, bIII/VI) 1
(#II, II, V) 1
(bII, iv, I, #III) 1
(#III, VII) 1
(VII, #VII, #IV) 1
(#IV, #III) 1
(#III, II) 1
(II, i) 1
(iv, VI) 1
(iv/iii, i) 1
(iv, bIII, v, I) 1
(iv, bIII, v, III/v) 1
(bVII, iii/iii, iii) 1
(bVII, I, II, III) 1
(#VII, i) 1
(I, bII, bIII) 1
(V, bI) 1
(bii, i) 1
(ii/VI, #VI) 1
(IV, ii, I) 1
(i, I, II) 1
(v, i, iv) 1
(i, iv, III) 1
(II, v) 1
(i, iv, VI) 1
(III, VII) 1
(v, VII/v) 1
(VI, bIII/VI, V, i) 1
(VII/v, v) 1
(V, bIII) 1
(bIII/bII, i) 1
(bv, i) 1
(i, bv) 1
(III, bii) 1
(#VI, I) 1
(i, III, vii, iv) 1
(VII, biii) 1
(V, vi, I) 1
(bvi, v, bv, I) 1
(V, biii, I) 1
(bIII, bVII) 1
(bIII, bvi, bV) 1
(bV, bbVII, I) 1
(I, bI, VI) 1
(iii, iv/iii) 1
(I, V, v) 1
(#iii, v/#iii) 1
(I, III, i) 1
(#VI, bV) 1
(bV, i) 1
(ii, vi, I) 1
(#VI, VI) 1
(VI, ii/VI) 1
(bIII/VI, VII, i) 1
(iv, I, V) 1
(#II, II, I) 1
(#VII, v) 1
(bII/V, iv) 1
(iv, bII) 1
(ii, v) 1
(IV, bII, I) 1
(iii, III) 1
(v, #iii) 1
(#iii, v) 1
(iv, bVI) 1
(bIII, i, bVI) 1
(bVI, i) 1
(iv, IV/IV, IV/IV/IV) 1
(IV/IV/IV, iv) 1
(iv, I) 1
(#iii, i) 1
(V, I, VI) 1
(VI, II) 1
(II, I) 1
(bIII, I, VI) 1
(III, VI) 1
(vi/V, I) 1
(I, vi/V) 1
(ii, IV) 1
(v/v/v, v, V) 1
(V, vi, ii) 1
(ii, I, vi) 1
(v, iv, III, i) 1
(V, v) 1
(bVI, iv) 1
(iv, i, I) 1
(I, vii) 1
(v/v, v/v/v) 1
(V, i, iv, V/V) 1
(i, vi) 1
(V/V, v, i) 1
(I, v) 1
(v/v, i) 1
(iii, vi, IV) 1
(vii, bII) 1
(bII, iv, i) 1
(IV, i) 1
(i, bII) 1
(III, iii) 1
(v, VI) 1
(iv, i, III) 1
(bVII, i) 1
(III, v, iv, i) 1
(vi, i) 1
(i, III, v) 1
(v, iv, VI, i) 1
(v, iv, III) 1
(i, VII, III) 1
(III, iv, v, i) 1
(I, IV, III) 1
(I, VI) 1
(bVII, IV) 1
(VI, vi) 1
(I, bVII, bVI) 1
(iii/i, i) 1
(i, #VI, III/#VI) 1
(iv, bIII) 1
(biii, bII, I) 1
(V, V/V) 1
(iii/V/VII, I) 1
(III, iv, VI) 1
(V, I, IV, vi) 1
(v, iv, i) 1
(ii, V, I) 1
(VI, VII) 1
(bVI, III) 1
(bII, vii, VI) 1
(vii, II) 1
(vii, iii) 1
(iii, vii, I) 1
(bIV, i) 1
(bIV, I) 1
(I, bIV) 1
(bII, I) 1
(vi, I, ii) 1
(IV, V) 1
(vi, I, v) 1
(v, vi, I) 1
(v, i, III) 1
(v, III, i) 1
(i, v, III) 1
(III, v, i) 1
(V, V/V, I) 1
(iii/bVI, I) 1

Cadences

Overall

  • PAC: Perfect Authentic Cadence

  • IAC: Imperfect Authentic Cadence

  • HC: Half Cadence

  • DC: Deceptive Cadence

  • EC: Evaded Cadence

  • PC: Plagal Cadence

HTML('''<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }
  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')
print(f"{all_labels.cadence.notna().sum()} cadence labels.")
value_count_df(all_labels.cadence)
5805 cadence labels.
counts
cadence
PAC 2809
HC 1599
IAC 1085
EC 137
DC 103
PC 72
px.pie(all_labels[all_labels.cadence.notna()], names="cadence", color="cadence", color_discrete_map=cadence_colors)

Per dataset

cadence_count_per_dataset = all_labels.groupby("corpus").cadence.value_counts()
cadence_fraction_per_dataset = cadence_count_per_dataset / cadence_count_per_dataset.groupby(level=0).sum()
px.bar(cadence_fraction_per_dataset.rename('count').reset_index(), x='corpus', y='count', color='cadence',
      color_discrete_map=cadence_colors, category_orders=dict(dataset=chronological_order))
fig = px.pie(cadence_count_per_dataset.rename('count').reset_index(), names='cadence', color='cadence', values='count',
       facet_col='corpus', facet_col_wrap=4, height=2000, color_discrete_map=cadence_colors)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**STD_LAYOUT)

Per phrase

Number of cadences per phrase

phrases_with_cadences = pd.concat([
    phrase_gpb.cadence.nunique().rename('n_cadences'),
    phrase_gpb.cadence.unique().rename('cadences').map(lambda l: tuple(e for e in l if not pd.isnull(e))),
    phrases_with_keys
], axis=1)
value_count_df(phrases_with_cadences.n_cadences, counts="#phrases")
#phrases
n_cadences
1 5344
0 1174
2 166
3 4
n_cad = phrases_with_cadences.groupby(level='corpus').n_cadences.value_counts().rename('counts').reset_index().sort_values('n_cadences')
n_cad.n_cadences = n_cad.n_cadences.astype(str)
fig = px.bar(n_cad, x='corpus', y='counts', color='n_cadences', height=800, barmode='group',
             labels=dict(n_cadences="#cadences in a phrase"),
             category_orders=dict(dataset=chronological_order)
      )
fig.show()

Combinations of cadence types for phrases with more than one cadence

value_count_df(phrases_with_cadences[phrases_with_cadences.n_cadences > 1].cadences)
counts
cadences
(DC, PAC) 40
(HC, PAC) 38
(EC, PAC) 35
(IAC, PAC) 12
(EC, HC) 11
(PAC, HC) 6
(DC, HC) 4
(EC, IAC) 3
(DC, IAC) 3
(PAC, IAC) 3
(PC, PAC) 3
(PAC, DC) 2
(HC, PC) 1
(IAC, HC, PAC) 1
(EC, PC) 1
(HC, DC, PAC) 1
(PAC, PC) 1
(IAC, HC) 1
(EC, DC, PAC) 1
(HC, DC) 1
(IAC, EC) 1
(DC, EC, IAC) 1

Positioning of cadences within phrases

df_rows = []
y_position = 0
for ix in phrases_with_cadences[phrases_with_cadences.n_cadences > 0].sort_values('duration_qb').index:
    df = phrase_segments.loc[ix]
    description = str(ix)
    if df.cadence.notna().any():
        interval = ix[2]
        df_rows.append((y_position, interval.length, "end of phrase", description))
        start_pos = interval.left
        cadences = df.loc[df.cadence.notna(), ['quarterbeats', 'cadence']]
        cadences.quarterbeats -= start_pos
        for cadence_x, cadence_type in cadences.itertuples(index=False, name=None):
            df_rows.append((y_position, cadence_x, cadence_type, description))
        y_position += 1
    #else:
    #    df_rows.append((y_position, pd.NA, pd.NA, description))
data = pd.DataFrame(df_rows, columns=["phrase_ix", "x", "marker", "description"])
fig = px.scatter(data[data.x.notna()], x='x', y="phrase_ix", color="marker", hover_name="description", height=3000,
                labels=dict(marker='legend'), color_discrete_map=cadence_colors)
fig.update_traces(marker_size=5)
fig.update_yaxes(autorange="reversed")
fig.show()

Cadence ultima

HTML('''<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }
  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')
phrase_segments = segmented.get_facet("expanded")
cadence_selector = phrase_segments.cadence.notna()
missing_chord_selector = phrase_segments.chord.isna()
cadence_with_missing_chord_selector = cadence_selector & missing_chord_selector
#print(f"Ultima missing for {cadence_with_missing_chord_selector.sum()} cadences.")
missing = phrase_segments[cadence_with_missing_chord_selector]
expanded = ms3.expand_dcml.expand_labels(phrase_segments[cadence_with_missing_chord_selector], propagate=False, chord_tones=True, skip_checks=True)
phrase_segments.loc[cadence_with_missing_chord_selector] = expanded
print(f"Ultima harmony missing for {(phrase_segments.cadence.notna() & phrase_segments.bass_note.isna()).sum()} cadence labels.")
MC 49: #vii in major context corrected to vii.
Ultima harmony missing for 29 cadence labels.

Ultimae as Roman numeral

def highlight(row, color="#ffffb3"):
    if row.counts < 10:
        return [None, None, None, None]
    else:
        return ["background-color: #ffffb3;"] * 4

cadence_counts = all_labels.cadence.value_counts()
ultima_root = phrase_segments.groupby(['localkey_is_minor', 'cadence']).numeral.value_counts().rename('counts').to_frame().reset_index()
ultima_root.localkey_is_minor = ultima_root.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_root.style.apply(highlight, axis=1)
fig = px.pie(ultima_root, names='numeral', values='counts',
             facet_row='cadence', facet_col='localkey_is_minor',
             height=1500,
             category_orders={'cadence': cadence_counts.index},
            )
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()
#phrase_segments.groupby(level=[0,1,2], group_keys=False).apply(lambda df: df if ((df.cadence == 'PAC') & (df.numeral == 'V')).any() else None)

Ultimae bass note as scale degree

ultima_bass = phrase_segments.groupby(['localkey_is_minor','cadence']).bass_note.value_counts().rename('counts').reset_index()
ultima_bass.bass_note = ms3.transform(ultima_bass, ms3.fifths2sd, dict(fifths='bass_note', minor='localkey_is_minor'))
ultima_bass.localkey_is_minor = ultima_bass.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_bass.style.apply(highlight, axis=1)
fig = px.pie(ultima_bass, names='bass_note', values='counts',
             facet_row='cadence', facet_col='localkey_is_minor',
             height=1500,
             category_orders={'cadence': cadence_counts.index},
            )
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()

Chord progressions

PACs with ultima I/i

#pac_on_i = phrase_segments.groupby(level=[0,1,2], group_keys=False).apply(lambda df: df if ((df.cadence == 'PAC') & (df.numeral.isin(('I', 'i')))).any() else None)
#pac_on_i.cadence.value_counts()
#pac_on_i.droplevel(-1).index.nunique()
def get_progressions(selected='PAC', last_row={}, feature='chord', dataset=None, as_series=True):
    """Uses the nonlocal variable phrase_segments."""
    last_row = {k: v if isinstance(v, tuple) else (v,) for k, v in last_row.items()}
    progressions = []

    for (corp, fname, *_), df in phrase_segments[phrase_segments[feature].notna()].groupby(level=[0,1,2]):
        if dataset is not None and dataset not in corp:
            continue
        if (df.cadence == selected).fillna(False).any():
            # remove chords after the last cadence label
            df = df[df.cadence.fillna(method='bfill').notna()]
            # group segments leading up to a cadence label
            cadence_groups = df.cadence.notna().shift().fillna(False).cumsum()
            for i, cadence in df.groupby(cadence_groups):
                last_r = cadence.iloc[-1]
                typ = last_r.cadence
                if typ != selected:
                    continue
                if any(last_r[feat] not in values for feat, values in last_row.items()):
                    continue
                progressions.append(tuple(cadence[feature]))
    if as_series:
        return pd.Series(progressions)
    return progressions
chord_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'chord')
print(f"Progressions for {len(chord_progressions)} cadences:")
value_count_df(chord_progressions, "chord progressions")
Progressions for 2774 cadences:
counts
chord progressions
(V, V7, I, ii6(2), ii6, V7, I) 12
(I, V7, I) 12
(I, I6, IV, V(64), V, I) 10
(I, IV, V(64), V, I) 9
(i, VM7, i, V7, V7(#2), V7, i) 8
... ...
(i, iv, V, iv6, V, VI, i, iv, i, iv, iv6, V, i6, i, V, iv6, V, iv, iv7, V, i) 1
(i6, ii%65, V(4), V, i) 1
(i, V6, i, iv6(2), iv6, iio6, V, III, VII(4), VII, #viio, i(9), i, i6, ii%65, V(4), V, i) 1
(i, V6, V65, I, vi6, V6(2), V6, iii6, IV6(2), IV6, ii6, V7, vi, IVM7, V(4), V, I) 1
(I, V7/IV, IV, ii7, V, iii7, vi, IVM7, V, ii, V, V2, I6, vi%43, V43, I, V(4), V, I) 1

2320 rows × 1 columns

numeral_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'numeral')
value_count_df(numeral_progressions, "numeral progressions")
counts
numeral progressions
(I, V, V, I) 21
(I, IV, V, V, I) 18
(I, V, I) 16
(I, V, I, V, I, V, I, V, I) 13
(I, I, IV, V, V, I) 13
... ...
(I, ii, I, I, ii, V, V, I) 1
(i, V, iv, ii, V, i, V, V, i) 1
(v, i, iv, V, III, VI, ii, V, i, iv, V, i, iv, ii, V, i, V, V, i) 1
(I, IV, I, V, V, I) 1
(I, #vii, i, V, i, v, iv, iv, ii, V, VI, ii, V, V, i) 1

2141 rows × 1 columns

def remove_immediate_duplicates(l):
    return tuple(a for a, b in zip(l, (None, ) + l) if a != b)

numeral_prog_no_dups = numeral_progressions.map(remove_immediate_duplicates)
value_count_df(numeral_prog_no_dups)
counts
(I, V, I) 56
(I, IV, V, I) 49
(I, V, I, V, I) 32
(I, V, I, ii, V, I) 24
(i, V, i, V, i) 23
... ...
(I, vii, vi, #vii, i) 1
(V, iii, IV, ii, V, I, IV, V, I, ii, V, I) 1
(I, IV, V, I, vi, V, IV, iii, I, ii, V, I) 1
(i, V, IV, V, VI, ii, V, i) 1
(i, V, i, V, i, v, vi, iv, V, i, ii, V, i) 1

1847 rows × 1 columns

PACs ending on scale degree 1

Scale degrees expressed w.r.t. major scale, regardless of actual key.

bass_progressions = get_progressions('PAC', dict(bass_note=0), 'bass_note')
bass_prog = bass_progressions.map(ms3.fifths2sd)
print(f"Progressions for {len(bass_progressions)} cadences:")
value_count_df(bass_prog, "bass progressions")
Progressions for 2580 cadences:
counts
bass progressions
(1, 4, 5, 5, 1) 23
(1, 5, 1) 23
(1, 3, 4, 5, 5, 1) 17
(1, 5, 5, 1) 15
(5, 5, 1, 4, 4, 5, 1) 12
... ...
(1, 3, 4, 4, 4, b3, 5, 5, 1) 1
(5, 1, b6, 7, 5, b6, 4, 5, 1, 4, #7, 1, 4, 4, 4, b3, 5, 5, 1) 1
(1, 4, 1, 5, 5, 1) 1
(1, 6, 2, 5, 1, 4, 1, 5, 5, 1) 1
(5, 4, 1, 1, 2, 2, 3, 3, 4, 5, 1, 3, 4, 4, 5, 6, 3, 4, 5, 1) 1

1988 rows × 1 columns

bass_prog_no_dups = bass_prog.map(remove_immediate_duplicates)
value_count_df(bass_prog_no_dups)
counts
(1, 5, 1) 50
(1, 5, 1, 5, 1) 38
(1, 4, 5, 1) 35
(1, 3, 4, 5, 1) 25
(5, 1, 4, 5, 1) 19
... ...
(1, #7, 1, 3, 4, 5, 1, 5, 4, 5, 1) 1
(1, b3, 1, 2, 5, 1) 1
(1, 6, #7, 1, #7, 6, 2, 5, 1, 4, #4, 5, 1) 1
(1, #7, 1, 3, 4, 5, b3, #7, 1, #7, 1) 1
(1, 4, 6, 5, 1, 3, 4, 5, 3, 4, 6, 3, 4, 5, 1) 1

1772 rows × 1 columns

def make_sankey(data, labels, node_pos=None, margin={'l': 10, 'r': 10, 'b': 10, 't': 10}, pad=20, color='auto', **kwargs):
    if color=='auto':
        unique_labels = set(labels)
        color_step = 100 / len(unique_labels)
        unique_colors = {label: f'hsv({round(i*color_step)}%,100%,100%)' for i, label in enumerate(unique_labels)}
        color = list(map(lambda l: unique_colors[l], labels))
    fig = go.Figure(go.Sankey(
        arrangement = 'snap',
        node = dict(
          pad = pad,
          #thickness = 20,
          #line = dict(color = "black", width = 0.5),
          label = labels,
          x = [node_pos[i][0] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
          y = [node_pos[i][1] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
          color = color,
          ),
        link = dict(
          source = data.source,
          target = data.target,
          value = data.value
          ),
        ),
     )

    fig.update_layout(margin=margin, **kwargs)
    return fig

def progressions2graph_data(progressions, cut_at_stage=None):
    stage_nodes = defaultdict(dict)
    edge_weights = Counter()
    node_counter = 0
    for progression in progressions:
        previous_node = None
        for stage, current in enumerate(reversed(progression)):
            if cut_at_stage and stage > cut_at_stage:
                break
            if current in stage_nodes[stage]:
                current_node = stage_nodes[stage][current]
            else:
                stage_nodes[stage][current] = node_counter
                current_node = node_counter
                node_counter += 1
            if previous_node is not None:
                edge_weights.update([(current_node, previous_node)])
            previous_node = current_node
    return stage_nodes, edge_weights

def graph_data2sankey(stage_nodes, edge_weights):
    data = pd.DataFrame([(u, v, w) for (u, v), w in edge_weights.items()], columns = ['source', 'target', 'value'])
    node2label = {node: label for stage, nodes in stage_nodes.items() for label, node in nodes.items()}
    labels = [node2label[i] for i in range(len(node2label))]
    return make_sankey(data, labels)

def plot_progressions(progressions, cut_at_stage=None):
    stage_nodes, edge_weights = progressions2graph_data(progressions, cut_at_stage=cut_at_stage)
    return graph_data2sankey(stage_nodes, edge_weights)

plot_progressions(numeral_prog_no_dups, cut_at_stage=3)
chord_progressions_minor = get_progressions('PAC', dict(numeral='i', localkey_is_minor=True), 'root')
chord_progressions_minor
0       (1, 0, 0, 0, 0, 2, 1, 0, -4, -1, 1, 0, -1, 5, ...
1       (0, -2, -3, -1, 5, 1, 1, 0, -2, -4, 1, -1, -3,...
2          (0, 5, 5, 0, 0, 2, -1, 1, 1, 1, 5, 0, 2, 1, 0)
3                         (1, 0, 1, 1, 1, 1, -1, 0, 1, 0)
4                                         (0, 1, 0, 5, 0)
                              ...
1001    (1, 1, 1, 0, 5, 0, 2, 2, -1, -1, 5, 1, 0, 2, 1...
1002                                     (-4, 6, 1, 1, 0)
1003    (1, 1, 5, 0, 2, 1, 1, 4, -1, -1, 2, 5, 0, -1, ...
1004    (1, 1, 1, 0, -1, -1, -1, -2, -2, -3, -3, -4, -...
1005                                     (0, -1, 1, 1, 0)
Length: 1006, dtype: object
pac_major = get_progressions('PAC', dict(numeral='I', localkey_is_minor=False), 'chord')
plot_progressions(pac_major, cut_at_stage=4)
deceptive = get_progressions('DC', dict(localkey_is_minor=False), 'chord')
deceptive.value_counts()
(I, V43(4), I6, IV, V7, vi)                                                                                                                                                                                                                                                                                       3
(i, viio43/V, V6, i, viio43/V, V6, I, IV6, I6, IV, viio6, I6, IV6, viio, I, ii65, V, vi)                                                                                                                                                                                                                          2
(I, IV, V/vi, vi, V, I, IV, V(64), V7, vi)                                                                                                                                                                                                                                                                        2
(I, V, ii7(9), V, V, ii7(9), V, V7, ii7(9), V, ii7, V, ii7(13), ii7, V7(#2), V7(6), vi)                                                                                                                                                                                                                           2
(i, iv/i, V(64)/i, V/i, I/i)                                                                                                                                                                                                                                                                                      2
(I, IV, V(64), V, i)                                                                                                                                                                                                                                                                                              2
(V43, I(4), V65, I, V2(6), V2, I6, #viio7/ii, iv64/ii, #viio65/ii, i6(6)/ii, #viio43/ii, i6/ii, vi, vii%2, vi, ii%43, I64, I6, V7/V, V7, V6/vi, vi)                                                                                                                                                               2
(I, V, I, V7, I, V6, I, viio/V, V, vi)                                                                                                                                                                                                                                                                            2
(IV6, V6, I(4), I, I6, IV, ii6, V7(4), V7, vi(^2), vi)                                                                                                                                                                                                                                                            2
(I, I6, IV, V(64), V, vi)                                                                                                                                                                                                                                                                                         1
(vi, I6, ii6, V, vi)                                                                                                                                                                                                                                                                                              1
(I, V6, vi, I6, ii6, V7, vi)                                                                                                                                                                                                                                                                                      1
(V65, I, V7, I, V65, I, V7, bVI)                                                                                                                                                                                                                                                                                  1
(I6, IV, V7, vi)                                                                                                                                                                                                                                                                                                  1
(V(4), V, V7, V64, V7, vi, I6, IV, ii6, V7, vi)                                                                                                                                                                                                                                                                   1
(V65, I, V7, I6, V65, I, V7, bVI)                                                                                                                                                                                                                                                                                 1
(V7, vi)                                                                                                                                                                                                                                                                                                          1
(i, V, i, V, i, V, VI/i, iv, V, VI/i, iv, V, VI/i, iv, V(64)/i, V, VI/i)                                                                                                                                                                                                                                          1
(I(4), I, V(4), V, IV6(112), IV6, V65(2), V6, I(9), I, ii7, viio6, I6, viio6, I, V65, I(9), I, V(4), V7, V(64), V43, I(4), I, V(4), V, V65, i(4))                                                                                                                                                                 1
(I, V6(4), V43/vi, vi, I6, ii6, V, V6/vi, vi, I6, ii6, V, V6/vi, vi)                                                                                                                                                                                                                                              1
(I, I6, ii6, V, vi)                                                                                                                                                                                                                                                                                               1
(I, V, I, IV6, ii, V7, bVI)                                                                                                                                                                                                                                                                                       1
(I, V7, I, V7, I, #viio7/ii, ii, V(64), V7, iv6)                                                                                                                                                                                                                                                                  1
(i6/iv, iv/iv, iio6/iv, V(64)/iv, V7/iv, bII)                                                                                                                                                                                                                                                                     1
(ii, V(64)/vi, It6/vi, V/vi, V7/V, V, V7/IV, IV, ii, V7(^9), V7, vi)                                                                                                                                                                                                                                              1
(#viio43/ii, ii, IV, V(64), V7, I)                                                                                                                                                                                                                                                                                1
(V(974), V, I, IV, V/vi, vi, I, IV, V(64), V7, vi)                                                                                                                                                                                                                                                                1
(I64, IV6, #viio2/vi, vi, vii%2, I, IV, V(64), V7, vi)                                                                                                                                                                                                                                                            1
(I, viio6, I6, IV, I, viio6, I6, IV, I, viio6, I6, IV, I, viio6, I6, viio/V, V, V(64), V7, V(64), V7, I, I6, ii6, V6/V, V(64), V(4), V7, I, I6, ii6, V6/V, V(64), V(4), V, bVI)                                                                                                                                   1
(V, V7, IV(+2), V7, vi)                                                                                                                                                                                                                                                                                           1
(I, V2, I, V2, I, V2, I, V2, I, V2, I6, I, viio6, viio, vi6, vi, V6, V, IV6, IV, iii6, iii, ii6, ii, I6, I, V6, IV6, iii6, ii6, I6, viio6, I, V, ii, vi, IV, I, V(4), V, I, V, ii, vi, I/IV, V2/IV, I/IV, V2/IV, I/IV, V7, V(64), V, V7, I, V6, IV6, iii6, ii6, I6, viio6, I, V6, IV6, iii6, ii6, I6, V7, bVI)    1
(vi, V65/IV, IV, V7, vi, ii6, V(64), V7, vi(64), vi)                                                                                                                                                                                                                                                              1
(V(64)/vi, viio65/V/vi, V/vi, iii, V43, I, V6, V7, #viio43/ii, V7, bVI)                                                                                                                                                                                                                                           1
(V, V2(9), iii, V43, I, #viio65/vii, V43, #viio2/iii, V7/IV, V+7/IV, IV, vii%43, V7/vi, V7(4+2)/vi, V7/vi, vi, IV(94), IV, I6, V43, I, IV(94), IV, I64, V(4), V, v, ii)                                                                                                                                           1
(I, ii2, I, I6, ii65, V(64), V, vi)                                                                                                                                                                                                                                                                               1
(I, I, V, I, vi, V/vi, vi, IV, V/IV, IV, ii, V/ii, ii, V, vi7)                                                                                                                                                                                                                                                    1
(I, V2, I6, IV, V6/V, V, V6/vi, i/vi, V(64)/vi, V/vi, VI/vi)                                                                                                                                                                                                                                                      1
(V2/V, V6, V2/IV, IV6, vi, ii, V7(^9), V7, I/bVI)                                                                                                                                                                                                                                                                 1
(I, V7, I, I6, viio6, vi6, V6, V7, I, IV64, I, V, V65, I, V, V65, I, ii6, V(64), V7, vi(64), vi)                                                                                                                                                                                                                  1
dtype: int64
plot_progressions(deceptive, cut_at_stage=4)
plot_progressions(bass_prog_no_dups, cut_at_stage=7)
def remove_sd_accidentals(t):
    return tuple(map(lambda sd: sd[-1], t))

bass_prog_no_acc_no_dup = bass_prog.map(remove_sd_accidentals).map(remove_immediate_duplicates)
plot_progressions(bass_prog_no_acc_no_dup, cut_at_stage=7)

HCs ending on V

half = get_progressions('HC', dict(numeral='V'), 'bass_note').map(ms3.fifths2sd)
print(f"Progressions for {len(half)} cadences:")
plot_progressions(half.map(remove_immediate_duplicates), cut_at_stage=5)
Progressions for 1541 cadences: